org 100h   ; assume ax=bx=0 si=0x100 di=sp=-2

  mov al,0x13
  int 0x10     ; set 320x200 mode

;Palette: 8 color gradients
  mov dx,0x3c8
  mov ch,2     ; cx=0x2ff
P mov al,0x3c  ; ah=0
  add bx,ax
  ror al,cl
  mul bh
  shr ax,9
  out dx,al    ; init: index=0
  mov dl,0xc9
  loop P

  mov bx,0x4000
  mov gs,bx    ; bx=gs=0x4000: table segment, sin->cos phase

  push 0xa000 - 160/16 ; sp=-4

;Sine table: 16384 float32 entries (-1..1)
  fninit       ; [di]=0
S mov bp,[di]
  fild word[di]                      ;| t
  fidiv word[si-0x100+c65536div2pi]  ;| T=t/65536*2pi
  fsin                               ;| sinT
  fstp dword[gs:bp+si]
  sub [di],sp  ; +4
  jnz S        ; bp=-4

  pop es       ; es=0x9ff6: centered screen segment

M:

;For each pixel: find dX,dY,dZ and initialize X,Y,Z
;si=time di=pixel_address bx=0x4000
X mov ax,0xcccd
  mul di
  mov cl,0x7c  ; cl:dh=dZ=0x7c??
  add dh,0x9f  ; dh:dl=dY
  xchg ax,bx   ; dl:bh=dX
  pusha        ; -10 -9 -8 -7 -6 -5 -4 -3
               ;  bl bh dl dh cl ch al ah
               ;     ( dX )      0  0  0x40
               ;        ( dY )
               ;           ( dZ )

               ; si=Z=time
  cwd          ; dx=Y=0
               ; ax=X=0x4000
  mov bx,ax    ; bx=0x4000

; Load dX,dY,dZ and rotate dX,dZ by time
                     ;  bx=0x4000 bx=0      bx=0x4000 bx=0
                     ;  bp=-4     bp=-4     bp=-2     bp=-2
L fild word[bp-5]    ;| dX      | dX      | dZ      | dZ
  fmul dword[gs:si+bx];|dX*cosT | dX*sinT | dZ*cosT | dZ*sinT
  xor bx,ax
  jz L
  xor bp,2
  jpo L              ;| dZ*s dZ*c dX*s dX*c ; bx=0x4000 bp=-4
  fsubp st3,st0
  faddp              ;| dX=dZ*c+dX*s dZ=dX*c-dZ*s
  fild word[bp-4]    ;| dY dX dZ
  
  shl si,3     ;Z=time*8

Z:
;Compute the signed distance to the gyroid
G fld dword[gs:si+bx]
  xchg ax,dx   ;| cosZ | cosY | cosX
  xchg ax,si   ; ax=X dx=Y si=Z -> ax=Z dx=X si=Y
  fmul dword[gs:si]
  inc bp       ;| cosZ*sinY | cosY*sinX | cosX*sinZ
  jpo G        ; bp=-1
  faddp
  faddp        ;| d=cosZ*sinY+cosY*sinX+cosX*sinZ
  fst qword[bx]; store d as float64
  fabs         ;| |d|
  fsubr dword[cOffset] ;| D=k-|d|: the complement of the gyroid surface

;Advance ray by distance
               ;| D dY dX dZ
A fxch st1     ;| dY D dX dZ
  fst st4      ;| dY D dX dZ dY
  fmul st1     ;| dY*D D dX dZ dY
  fmul dword[cDistFactor]
  fistp word[bx] 
  xchg ax,dx   ; ax=X dx=Y si=Z -> ax=Y dx=X si=Z
  add ax,[bx]  ; Y+=dY*D | X+=dX*D | Z+=dZ*D
  and al,0xfc  ; align to a multiple of 4 (for sine table)
  xchg ax,si   ; ax=Y dx=X si=Z -> ax=Z dx=X si=Y
  dec bp
  jpo A        ; bp=-4

;Are we close enough?
  fstp dword[bx]; store D as float32
  cmp byte[bx+3],0x3e ; cmp bits(D),0x3e000000
  jl E         ; hit if D<0.125
  add cx,bp
  jnz Z        ; max 32 iterations

;Draw pixel
E fcompp
  fstp st0     ;|
  xchg ax,cx
  or al,3
  shl byte[bx+7],1 ; carry = sign of d
  adc al,al   ; color = sign of d
  stosb
  popa
  xchg ax,bx

;Next pixel
  inc di
  jnz X

;Next frame
  sub si,-128  ; time++
  in al,0x60   ; esc check
  dec al
  jnz M
  ret

c65536div2pi: dw 10430 ; 65536/2pi
cDistFactor: dd 0.2 ; (0.75 (Lipchitz constant) / 44700 (avg dir length)) * 65536/2pi
cOffset: dd 1.52


;; Palette test
;  push 0xa000
;  pop es
;  xor di,di
;  xor ax,ax
;Y stosb
;  inc al
;  jnz Y
;  add di,64
;  jns Y
;
;  xor ax,ax
;  int 0x16
;  ret

